deltas: Add a compression size heuristic for endianness detection
authorColin Walters <walters@verbum.org>
Thu, 25 Feb 2016 16:07:30 +0000 (11:07 -0500)
committerColin Walters <walters@verbum.org>
Fri, 26 Feb 2016 13:19:01 +0000 (08:19 -0500)
I see when analyzing a delta here that due to byteswapping a negative
compression ratio of 540%, 66%, and 28%.  Let's arbitrarily pick 20%
as a threshold for detecting byetswapping.

src/libostree/ostree-repo-static-delta-core.c

index 0669f691e42593a788ab17bb34cd95fe218d4b9e..d84f00193ab0a13d93a9c4dba555d12b7e3621ee 100644 (file)
@@ -705,6 +705,7 @@ _ostree_delta_get_endianness (GVariant *superblock,
   { g_autoptr(GVariant) meta_entries = NULL;
     guint n_parts;
     guint i;
+    gboolean is_byteswapped = FALSE;
 
     g_variant_get_child (superblock, 6, "@a" OSTREE_STATIC_DELTA_META_ENTRY_FORMAT, &meta_entries);
     n_parts = g_variant_n_children (meta_entries);
@@ -721,15 +722,36 @@ _ostree_delta_get_endianness (GVariant *superblock,
         total_objects += n_objects;
         total_size += size;
         total_usize += usize;
+
+        if (size > usize)
+          {
+            double ratio = ((double)size)/((double)usize);
+
+            /* This should really never happen where compressing things makes it more than 50% bigger.
+             */ 
+            if (ratio > 1.2)
+              {
+                is_byteswapped = TRUE;
+                break;
+              }
+          }
+      }
+
+    if (!is_byteswapped)
+      {
+        /* If the average object size is greater than 4GiB, let's assume
+         * we're dealing with opposite endianness.  I'm fairly confident
+         * no one is going to be shipping peta- or exa- byte size ostree
+         * deltas, period.  Past the gigabyte scale you really want
+         * bittorrent or something.
+         */
+        if ((total_size / total_objects) > G_MAXUINT32)
+          {
+            is_byteswapped = TRUE;
+          }
       }
 
-    /* If the average object size is greater than 4GiB, let's assume
-     * we're dealing with opposite endianness.  I'm fairly confident
-     * no one is going to be shipping peta- or exa- byte size ostree
-     * deltas, period.  Past the gigabyte scale you really want
-     * bittorrent or something.
-     */
-    if ((total_size / total_objects) > G_MAXUINT32)
+    if (is_byteswapped)
       {
         switch (G_BYTE_ORDER)
           {